#delimit;
set more off;


set logtype text;

log using /home/dnc2101/Accidental_Deaths/Vital_Stats/Summary_Stats_Types_Accidents/make_age_group_vars_99_04.log, replace;




/********************************************************************************************************************/
*
* This do file takes the Vital Stats Mortality data, and creates counts of the number of accidental deaths, other
* than auto accidents and drug overdoses, that occur amongst different age groups in each state in each year.  These
* counts are the numerators for the dependent variables in the regressions shown in Table 5 of the Joint and Several
* Liability and Accidental Deaths paper.  This do file creates these counts for the years 1999 through 2004, the 
* years of the data that use ICD-10 coding on cause of death;  a seperate do file does the same exact task for the 
* years 1981 through 1998, the years of data which use ICD-9 coding on cause of death
*
*
* Program by Dan Carvell, written between Fall 2008 and Spring 2010.  
*
*********************************************************************************************************************/




use "/home/dnc2101/Accidental_Deaths/Vital_Stats/Data_For_Appending/Injury_Deaths_99_04.dta", clear;




/* the first thing to do in this do file is to drop deaths due to drug overdoses, auto accidents, and intentional injuries from the data,     */
/* plus injuries of unknown intent as well                                                                                                    */
/* after these types of deaths are dropped, I save a new dataset that has all these types of deaths dropped from it, so I can reopen this     */
/* new dataset at any later point, which saves time when calculating all of the demographic-group specific death rates                        */

** first, make sure that any death that isn't an injury death has been dropped;

drop if ucr358<381;

** now drop the drug overdoses;

drop if ucr358==420;

** now drop all intentional injuries and injuries of unknown intent;

drop if (ucr358>=424 & ucr358<=452);

drop if ucr358>456;
** nothing should have ucr358 greater than 456 though;

** ucr358's of 453, 454, 455, and 456 are adverse effects by the way;



** Now drop all the auto accidents;


drop if (ucr358>=385 & ucr358<=397);

** make sure those commands did what I wanted them to do;

sum ucr358;

tab ucr358;


** Now save this new dataset, which only has data on deaths due to accidents other than auto accidents and drug overdoses;

** the variable for state FIPS code is called fipsstr in this dataset, for compatibility with my other datasets I will want a variable called statefip for state FIPS;


gen statefip=fipsstr;

sort statefip year;

compress;
   
save "/home/dnc2101/Accidental_Deaths/Vital_Stats/Data_For_Appending/accidents_no_auto_no_OD_99_04.dta", replace;







** Now I can construct the counts of accidental deaths within the different demographic groups;
** I must make sure the names of the variables I construct here aren't exactly the same as the names of the variables for the state-year populations ;
** in different demographic groups, the denominator of the rate, which were made from the Census data;

** Some notes about the coding in the age variables in this data;
** decedents aged 0 to 364 days are "ager52" between 01 and 22, inclusive;
** decedents aged 0 to 364 days are also "age" between 200 and 699;
** decedents aged 1 to 99 years are "age" between 1 and 99;
** decedents aged 100 and above years are "age" between 100 and 199;







** Decedents aged 0 to 5;
** the population variable for the denominator for this rate is called "ages_0_to_5";

gen ages_0_to_5_count=0;
replace ages_0_to_5_count=1 if (( ((ager52>=01 & ager52<=22) | (age>=1 & age<=5)) & year<2003 ));
replace ages_0_to_5_count=1 if (( ((ager52>=01 & ager52<=22) | (age>=1001 & age<=1005)) & year>=2003 ));

label variable ages_0_to_5_count "count of accidents amongst ages 0 to 5";


collapse (sum) ages_0_to_5_count, by(statefip year); 


** sort the data so that the merge with other data goes OK;

sort statefip year;

** now save the data;

save "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/Ages_0_to_5_Counts_99_04.dta", replace;





** Now, decedents aged 18 to 64;

use "/home/dnc2101/Accidental_Deaths/Vital_Stats/Data_For_Appending/accidents_no_auto_no_OD_99_04.dta", clear;

** the population variable for the denominator for this rate is called "ages_18_to_64";

gen ages_18_to_64_count=0;

replace ages_18_to_64_count=1 if ( (age>=18 & age<=64) & year<2003 );
replace ages_18_to_64_count=1 if ( (age>=1018 & age<=1064) & year>=2003 );



label variable ages_18_to_64_count "count of accidents amongst ages 18 to 64";


collapse (sum) ages_18_to_64_count, by(statefip year); 


** sort the data so that the merge with other data goes OK;

sort statefip year;

** now save the data;

save "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/Ages_18_to_64_Counts_99_04.dta", replace;




** Now, decedents aged 6 to 17;


use "/home/dnc2101/Accidental_Deaths/Vital_Stats/Data_For_Appending/accidents_no_auto_no_OD_99_04.dta", clear;

** the population variable for the denominator for this rate is called "ages_6_to_17";

gen ages_6_to_17_count=0;
replace ages_6_to_17_count=1 if ( (age>=6 & age<=17) & year<2003 );
replace ages_6_to_17_count=1 if ( (age>=1006 & age<=1017) & year>=2003 );

label variable ages_6_to_17_count "count of accidents amongst ages 6 to 17";


collapse (sum) ages_6_to_17_count, by(statefip year); 


** sort the data so that the merge with other data goes OK;

sort statefip year;

** now save the data;

save "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/Ages_6_to_17_Counts_99_04.dta", replace;






** Now decedents aged 65 and above;

use "/home/dnc2101/Accidental_Deaths/Vital_Stats/Data_For_Appending/accidents_no_auto_no_OD_99_04.dta", clear;

** the population variable for the denominator for this rate is called "age_65_or_above";

gen age_65_or_above_count=0;

replace age_65_or_above_count=1 if ( (age>=65 & age<=199) & year<2003 );
replace age_65_or_above_count=1 if ( (age>=1065 & age<=1150) & year>=2003 );


label variable age_65_or_above_count "count of accidents amongst ages 65 and above";


collapse (sum) age_65_or_above_count, by(statefip year); 


** sort the data so that the merge with other data goes OK;

sort statefip year;

** now save the data;

save "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/Age_65_or_Above_Counts_99_04.dta", replace;












** merge all of these variables I've created on counts of accidental deaths within different demographic groups into one dataset;


use "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/Ages_0_to_5_Counts_99_04.dta", clear;


sort statefip year;
merge statefip year using "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/Ages_6_to_17_Counts_99_04.dta", unique;

drop if _merge!=3;


drop _merge;



sort statefip year;
merge statefip year using "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/Ages_18_to_64_Counts_99_04.dta", unique;

drop if _merge!=3;

drop _merge;




sort statefip year;
merge statefip year using "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/Age_65_or_Above_Counts_99_04.dta", unique;

drop if _merge!=3;




** browse and check the summary statistics to make sure everything looks OK;

summarize;

** I need to drop some obs for statefip==0, and the maybe some obs for statefip==.;

drop if statefip==0;
drop if statefip==.;

summarize;

sort statefip year;

save "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/All_Different_Age_Group_Counts_99_04.dta", replace;







** Now, append the 1981-1998 and 1999-2004 datasets for these counts-by-demographic-group variables together;


use "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/All_Different_Age_Group_Counts_81_98.dta", clear;

sort statefip year;

append using "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/All_Different_Age_Group_Counts_99_04.dta";

sort statefip year;

summarize;

compress;

save "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/All_Different_Age_Group_Counts_81_04.dta", replace;





** Export this as an XML file, for merging into the raw data in Excel, and for easier browsing to make sure the data looks OK;



xmlsave "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/All_Different_Age_Group_Counts_81_04_XML.xml", doctype(excel) replace;







log close;
